A data set on Middle East Respiratory Sydrome Corona Virus (MERS-CoV), and that has been made available by Andrw Rambaut, was utilized to practice technqiues in R studio. As such, this document contains various practce plots.
This project used R-studio for statistical analysis.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
No functions were used in this analysis of the MERS-CoV data.
setwd('C:/Users/msb75462/Desktop/mers')
mers<-read.csv('cases.csv')
mers$hospitalized[890]<-c('2015-02-20')
mers<-mers[-471,]
mers$hospitalized2<-ymd(mers$hospitalized)
## Warning: 5 failed to parse.
mers$onset2<-ymd(mers$onset)
class(mers$onset2)
## [1] "Date"
day0<-min(na.omit(mers$onset2))
mers$epi.day<-as.numeric(mers$onset2-day0)
Endimic curves
ggplot(data=mers) +
geom_bar(mapping=aes(x=epi.day, fill=country)) +
labs(x='Epidemic day', y='Case count', title='Global count of MERS cases by date of symptom onset',
caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## Warning: Removed 535 rows containing non-finite values (stat_count).
## Warning: position_stack requires non-overlapping x intervals
This plots shows us the Global count of MERS cases by date with a legend of countries on the side.
ggplot(data=mers) +
geom_bar(mapping=aes(x=epi.day, fill=gender)) +
labs(x='Epidemic day', y='Case count', title='Global count of MERS cases by date of symptom onset',
caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## Warning: Removed 535 rows containing non-finite values (stat_count).
This plots shows us the Global count of MERS cases by date with a legend coordiateing to the male and female genders on the side.
Univariate Plot
mers$infectious.period <- mers$hospitalized2-mers$onset2
This is the raw infectious period.
Histogram
class(mers$infectious.period)
## [1] "difftime"
mers$infectious.period <- as.numeric(mers$infectious.period, units = "days")
ggplot(data=mers) +geom_histogram(aes(x=infectious.period)) +
labs(x='Infectious period', y='Frequency', title='Distribution of calculated MERS infectious period',
caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
Nonsocimal Infection
mers$infectious.period2 <- ifelse(mers$infectious.period<0,0,mers$infectious.period)
ggplot(data=mers) + geom_histogram(aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency', title='Distribution of calculated MERS infectious period',
caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
This is an updated infectious period as this is a nonsocimal infection.
Density Plot
ggplot(data=mers) +
geom_density(mapping=aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency',
title='Probability density for MERS infectious period (positive values only)', caption ="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## Warning: Removed 727 rows containing non-finite values (stat_density).
Area Plot
ggplot(data=mers) +
geom_area(stat='bin', mapping=aes(x=infectious.period2)) +
labs(x='Infectious period', y='Frequency',
title='Area plot for MERS infectious period (positive values only)', caption ="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 727 rows containing non-finite values (stat_bin).
Bivariate Plot
ggplot(data=mers, mapping=aes(x=epi.day, y=infectious.period2)) +
geom_smooth()+
geom_point() +
labs(x='Epidemic day', y='Infectious period',
title='MERS infectious period (positive values only) over time', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 727 rows containing non-finite values (stat_smooth).
## Warning: Removed 727 rows containing missing values (geom_point).
Faceting - Multipanel Plot
ggplot(data=mers, mapping=aes(x=epi.day, y=infectious.period2)) +
geom_point(mapping = aes(color=country)) +
facet_wrap(~ country) +
scale_y_continuous(limits = c(0, 50)) +
labs(x='Epidemic day', y='Infectious period',
title='MERS infectious period (positive values only) over time', caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## Warning: Removed 728 rows containing missing values (geom_point).
Seperate Plots for Male and Female
ggplot(data=subset(mers, gender %in% c('M', 'F') &
country %in% c('KSA', 'Oman', 'Iran', 'Jordan', 'Qatar', 'South Korea','UAE')),
mapping=aes(x=epi.day, y=infectious.period2)) +
geom_point(mapping = aes(color=country)) +
facet_grid(gender ~ country) +
scale_y_continuous(limits = c(0, 50)) +
labs(x='Epidemic day', y='Infectious period',
title='MERS infectious period by gender and country',
caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
## Warning: Removed 692 rows containing missing values (geom_point).
Interactive Plot
epi.curve <- ggplot(data=mers) +
geom_bar(mapping=aes(x=epi.day)) +
labs(x='Epidemic day', y='Case count', title='Global count of MERS cases by date of symptom onset',
caption="Data from: https://github.com/rambaut/MERS-Cases/blob/gh-pages/data/cases.csv")
ggplotly(epi.curve)
## Warning: Removed 535 rows containing non-finite values (stat_count).
This study reflected………